Mac Easy 2010 May

home *** CD-ROM | disk | FTP | other *** search

/ Mac Easy 2010 May / Mac Life Ubuntu.iso / casper / filesystem.squashfs / var / lib / python-support / python2.6 / rdflib / sparql / Query.pyc (.txt) < prev next >

Wrap

Python Compiled Bytecode | 2009-04-20 | 31.9 KB | 850 lines

# Source Generated with Decompyle++ # File: in.pyc (Python 2.6) import types import sets from rdflib import URIRef, BNode, Literal from rdflib.Identifier import Identifier from rdflib.util import check_subject, list2set from rdflib.sparql import SPARQLError from rdflib.sparql.Unbound import Unbound from rdflib.sparql.sparqlGraph import SPARQLGraph from rdflib.sparql.graphPattern import GraphPattern class SessionBNode(BNode): """ Special 'session' BNodes. I.e., BNodes at the query side which refer to BNodes in persistence """ pass def _checkOptionals(pattern, optionals): """ The following remark in the SPARQL document is important: 'If a new variable is mentioned in an optional block (as mbox and hpage are mentioned in the previous example), that variable can be mentioned in that block and can not be mentioned in a subsequent block.' What this means is that the various optional blocks do not interefere at this level and there is no need for a check whether a binding in a subsequent block clashes with an earlier optional block. This method checks whether this requirement is fulfilled. Raises a SPARQLError exception if it is not (the rest of the algorithm relies on this, so checking it is a good idea...) @param pattern: graph pattern @type pattern: L{GraphPattern<rdflib.sparql.GraphPattern>} @param optionals: graph pattern @type optionals: L{GraphPattern<rdflib.sparql.GraphPattern>} @raise SPARQLError: if the requirement is not fulfilled """ for i in xrange(0, len(optionals)): for c in optionals[i].unbounds: if c in pattern.unbounds: continue if i > 0: for j in xrange(0, i): if c in optionals[j].unbounds: raise SPARQLError('%s is an illegal query string, it appear in a previous OPTIONAL clause' % c) c in optionals[j].unbounds def _variablesToArray(variables, name = ''): '''Turn an array of Variables or query strings into an array of query strings. If the \'variables\' is in fact a single string or Variable, then it is also put into an array. @param variables: a string, a unicode, or a Variable, or an array of those (can be mixed, actually). As a special case, if the value is "*", it returns None (this corresponds to the wildcard in SPARQL) @param name: the string to be used in the error message ''' if isinstance(variables, basestring): if variables == '*': return None return [ variables] isinstance(variables, basestring) if isinstance(variables, Unbound): return [ variables.name] if type(variables) == list or type(variables) == tuple: retval = [] for s in variables: if isinstance(s, basestring): retval.append(s) continue isinstance(variables, Unbound) if isinstance(s, Unbound): retval.append(s.name) continue raise SPARQLError("illegal type in '%s'; must be a string, unicode, or a Variable" % name) else: raise SPARQLError("'%s' argument must be a string, a Variable, or a list of those" % name) return type(variables) == tuple def _createInitialBindings(pattern): '''Creates an initial binding directory for the Graph Pattern by putting a None as a value for each query variable. @param pattern: graph pattern @type pattern: L{GraphPattern<rdflib.sparql.GraphPattern>} ''' bindings = { } for c in pattern.unbounds: bindings[c] = None return bindings class _SPARQLNode: """ The SPARQL implementation is based on the creation of a tree, each level for each statement in the 'where' clause of SPARQL. Each node maintains a 'binding' dictionary, with the variable names and either a None if not yet bound, or the binding itself. The method 'expand' tries to make one more step of binding by looking at the next statement: it takes the statement of the current node, binds the variables if there is already a binding, and looks at the triple store for the possibilities. If it finds valid new triplets, that will bind some more variables, and children will be created with the next statement in the 'where' array with a new level of bindings. This is done for each triplet found in the store, thereby branching off the tree. If all variables are already bound but the statement, with the bound variables, is not 'true' (ie, there is no such triple in the store), the node is marked as 'clash' and no more expansion is made; this node will then be thrown away by the parent. If I{all} children of a node is a clash, then it is marked as a clash itself. At the end of the process, the leaves of the tree are searched; if a leaf is such that: - all variables are bound - there is no clash then the bindings are returned as possible answers to the query. The optional clauses are treated separately: each 'valid' leaf is assigned an array of expansion trees that contain the optional clauses (that may have some unbound variables bound at the leaf, though). @ivar parent: parent in the tree @type parent: _SPARQLNode @ivar children: the children (in an array) @type children: array of _SPARQLNode @ivar bindings: copy of the bindings locally @type bindings: dictionary @ivar statement: the current statement @type statement: a (s,p,o,f) tuple ('f' is the local filter or None) @ivar rest: the rest of the statements (an array) @ivar clash: intialized to False @type clash: Boolean @ivar bound: True or False depending on whether all variables are bound in self.binding @type bound: Boolean @ivar optionalTrees: expansion trees for optional statements @type optionalTrees: array of _SPARQLNode instances """ def __init__(self, parent, bindings, statements, tripleStore): """ @param parent: parent node @param bindings: a dictionary with the bindings that are already done or with None value if no binding yet @param statements: array of statements from the 'where' clause. The first element is for the current node, the rest for the children. If empty, then no expansion occurs (ie, the node is a leaf) @param tripleStore: the 'owner' triple store @type tripleStore: L{sparqlGraph<rdflib.sparql.sparqlGraph.sparqlGraph>} """ self.tripleStore = tripleStore self.bindings = bindings self.optionalTrees = [] if None in bindings.values(): self.bound = False else: self.bound = True self.clash = False self.parent = parent self.children = [] if len(statements) > 0: self.statement = statements[0] self.rest = statements[1:] else: self.statement = None self.rest = None def returnResult(self, select): """ Collect the result by search the leaves of the the tree. The variables in the select are exchanged against their bound equivalent (if applicable). This action is done on the valid leaf nodes only, the intermediate nodes only gather the children's results and combine it in one array. @param select: the array of unbound variables in the original select that do not appear in any of the optionals. If None, the full binding should be considered (this is the case for the SELECT * feature of SPARQL) @returns: an array of dictionaries with non-None bindings. """ if len(self.children) > 0: retval = [] for c in self.children: res = c.returnResult(select) for t in res: retval.append(t) return retval retval = [] if self.bound == True and self.clash == False: result = { } if select: for a in select: if a in self.bindings: result[a] = self.bindings[a] continue len(self.children) > 0 else: result = self.bindings.copy() retval = [ result] for optTree in self.optionalTrees: optionals = optTree.returnResult(select) if len(optionals) == 0: continue continue if len(optionals) == 1: optResult = optionals[0] for res in retval: for k in optResult: if optResult[k] != None: res[k] = optResult[k] continue newRetval = [] for optResult in optionals: for res in retval: dct = { } dct = res.copy() for k in optResult: if optResult[k] != None: dct[k] = optResult[k] continue newRetval.append(dct) retval = newRetval return retval def expandSubgraph(self, subTriples, pattern): ''' Method used to collect the results. There are two ways to invoke the method: - if the pattern argument is not None, then this means the construction of a separate triple store with the results. This means taking the bindings in the node, and constuct the graph via the L{construct<rdflib.sparql.graphPattern.GraphPattern.construct>} method. This happens on the valid leafs; intermediate nodes call the same method recursively - otherwise, a leaf returns an array of the bindings, and intermediate methods aggregate those. In both cases, leaf nodes may successifely expand the optional trees that they may have. @param subTriples: the triples so far @type subTriples: L{sparqlGraph<rdflib.sparql.sparqlGraph.sparqlGraph>} @param pattern: a graph pattern used to construct a graph @type pattern: L{GraphPattern<rdflib.sparql.graphPattern.GraphPattern>} @return: if pattern is not None, an array of binding dictionaries ''' def b(r, bind): if type(r) == str: val = bind[r] if val == None: raise RuntimeError() val == None return bind[r] return r if len(self.children) > 0: if pattern == None: retval = []([], [ x.expandSubgraph(subTriples, None) for x in self.children ], []) (s, p, o, func) = self.statement for bind in retval: try: st = (b(s, bind), b(p, bind), b(o, bind)) subTriples.add(st) continue reduce continue return retval for x in self.children: x.expandSubgraph(subTriples, pattern) elif self.bound == True and self.clash == False: for t in self.optionalTrees: t.expandSubgraph(subTriples, pattern) if pattern == None: return [ self.bindings] pattern.construct(subTriples, self.bindings) else: return [] return pattern == None def _bind(self, r): ''' @param r: string @return: returns None if no bindings occured yet, the binding otherwise ''' if isinstance(r, basestring) and not isinstance(r, Identifier): if self.bindings[r] == None: return None return self.bindings[r] not isinstance(r, Identifier) if isinstance(r, SessionBNode): return r if isinstance(r, BNode): return self.bindings.get(r) return r def expand(self, constraints): ''' The expansion itself. See class comments for details. @param constraints: array of global constraining (filter) methods ''' if self.statement: (s, p, o, func) = self.statement search_s = self._bind(s) search_p = self._bind(p) search_o = self._bind(o) if self.tripleStore.graphVariable: if not hasattr(self.tripleStore.graph, 'quads'): raise AssertionError, 'Graph graph patterns can only be used with Graph instances with a quad method' searchRT = self.tripleStore.graph.quads((search_s, search_p, search_o)) else: searchRT = self.tripleStore.graph.triples((search_s, search_p, search_o)) for tripleOrQuad in searchRT: if self.tripleStore.graphVariable: (result_s, result_p, result_o, parentGraph) = tripleOrQuad else: (result_s, result_p, result_o) = tripleOrQuad if func != None and func(result_s, result_p, result_o) == False: continue new_bindings = self.bindings.copy() if search_s == None: new_bindings[s] = result_s if search_p == None: new_bindings[p] = result_p if search_o == None: new_bindings[o] = result_o if self.tripleStore.graphVariable: new_bindings[self.tripleStore.graphVariable] = parentGraph.identifier child = _SPARQLNode(self, new_bindings, self.rest, self.tripleStore) child.expand(constraints) if self.clash == False: self.children.append(child) continue if len(self.children) == 0: self.clash = True elif self.bound == True and self.clash == False: for func in constraints: if func(self.bindings) == False: self.clash = True break continue def expandOptions(self, bindings, statements, constraints): """ Managing optional statements. These affect leaf nodes only, if they contain 'real' results. A separate Expansion tree is appended to such a node, one for each optional call. @param bindings: current bindings dictionary @param statements: array of statements from the 'where' clause. The first element is for the current node, the rest for the children. If empty, then no expansion occurs (ie, the node is a leaf). The bindings at this node are taken into account (replacing the unbound variables with the real resources) before expansion @param constraints: array of constraint (filter) methods """ def replace(key, resource, tupl): (s, p, o, func) = tupl if key == s: s = resource if key == p: p = resource if key == o: o = resource return (s, p, o, func) if len(self.children) == 0: if self.bound == True and self.clash == False: toldBNodeLookup = { } for key in self.bindings: normalizedStatements = [] for t in statements: val = self.bindings[key] if isinstance(val, BNode) and val not in toldBNodeLookup: toldBNodeLookup[val] = val normalizedStatements.append(replace(key, self.bindings[key], t)) statements = normalizedStatements if key in bindings: del bindings[key] continue bindings.update(toldBNodeLookup) optTree = _SPARQLNode(None, bindings, statements, self.tripleStore) self.optionalTrees.append(optTree) optTree.expand(constraints) else: for c in self.children: c.expandOptions(bindings, statements, constraints) def _processResults(select, arr): ''' The result in an expansion node is in the form of an array of binding dictionaries. The caller should receive an array of tuples, each tuple representing the final binding (or None) I{in the order of the original select}. This method is the last step of processing by processing these values to produce the right result. @param select: the original selection list. If None, then the binding should be taken as a whole (this corresponds to the SELECT * feature of SPARQL) @param arr: the array of bindings @type arr: an array of dictionaries @return: a list of tuples with the selection results ''' retval = [] return retval def query(graph, selection, patterns, optionalPatterns = [], initialBindings = { }): ''' A shorthand for the creation of a L{Query} instance, returning the result of a L{Query.select} right away. Good for most of the usage, when no more action (clustering, etc) is required. @param selection: a list or tuple with the selection criteria, or a single string. Each entry is a string that begins with a"?". @param patterns: either a L{GraphPattern<rdflib.sparql.graphPattern.GraphPattern>} instance or a list of instances thereof. Each pattern in the list represent an \'OR\' (or \'UNION\') branch in SPARQL. @param optionalPatterns: either a L{GraphPattern<rdflib.sparql.graphPattern.GraphPattern>} instance or a list of instances thereof. For each elements in the \'patterns\' parameter is combined with each of the optional patterns and the results are concatenated. The list may be empty. @return: list of query results @rtype: list of tuples ''' result = queryObject(graph, patterns, optionalPatterns, initialBindings) if result == None: msg = 'Errors in the patterns, no valid query object generated; ' if isinstance(patterns, GraphPattern): msg += 'pattern:\n%s' % patterns else: msg += 'pattern:\n%s\netc...' % patterns[0] raise SPARQLError(msg) result == None return result.select(selection) def queryObject(graph, patterns, optionalPatterns = [], initialBindings = None): """ Creation of a L{Query} instance. @param patterns: either a L{GraphPattern<rdflib.sparql.graphPattern.GraphPattern>} instance or a list of instances thereof. Each pattern in the list represent an 'OR' (or 'UNION') branch in SPARQL. @param optionalPatterns: either a L{GraphPattern<rdflib.sparql.graphPattern.GraphPattern>} instance or a list of instances thereof. For each elements in the 'patterns' parameter is combined with each of the optional patterns and the results are concatenated. The list may be empty. @return: Query object @rtype: L{Query} """ def checkArg(arg, error): if arg == None: return [] if isinstance(arg, GraphPattern): return [ arg] if type(arg) == list or type(arg) == tuple: for p in arg: if not isinstance(p, GraphPattern): raise SPARQLError("'%s' argument must be a GraphPattern or a list of those" % error) isinstance(p, GraphPattern) return arg raise SPARQLError("'%s' argument must be a GraphPattern or a list of those" % error) finalPatterns = checkArg(patterns, 'patterns') finalOptionalPatterns = checkArg(optionalPatterns, 'optionalPatterns') retval = None if not initialBindings: initialBinding = { } for pattern in finalPatterns: _checkOptionals(pattern, finalOptionalPatterns) bindings = _createInitialBindings(pattern) if initialBindings: bindings.update(initialBindings) top = _SPARQLNode(None, bindings, pattern.patterns, graph) top.expand(pattern.constraints) for opt in finalOptionalPatterns: bindings = _createInitialBindings(opt) if initialBindings: bindings.update(initialBindings) top.expandOptions(bindings, opt.patterns, opt.constraints) r = Query(top, graph) if retval == None: retval = r continue retval = retval + r return retval class Query: ''' Result of a SPARQL query. It stores to the top of the query tree, and allows some subsequent inquiries on the expanded tree. B{This class should not be instantiated by the user,} it is done by the L{queryObject<SPARQL.queryObject>} method. ''' def __init__(self, sparqlnode, triples, parent1 = None, parent2 = None): ''' @param sparqlnode: top of the expansion tree @type sparqlnode: _SPARQLNode @param triples: triple store @type triples: L{sparqlGraph<rdflib.sparql.sparqlGraph>} @param parent1: possible parent Query when queries are combined by summing them up @type parent1: L{Query} @param parent2: possible parent Query when queries are combined by summing them up @type parent2: L{Query} ''' self.top = sparqlnode self.triples = triples self.parent1 = parent1 self.parent2 = parent2 def __add__(self, other): """This may be useful when several queries are performed and one wants the 'union' of those. Caveat: the triple store must be the same for each argument. This method is used internally only anyway... Efficiency trick (I hope it works): the various additions on subgraphs are not done here; the results are calculated only if really necessary, ie, in a lazy evaluation manner. This is achieved by storing self and the 'other' in the new object """ return Query(None, self.triples, self, other) def _getFullBinding(self): '''Retrieve the full binding, ie, an array of binding dictionaries ''' if self.parent1 != None and self.parent2 != None: return self.parent1._getFullBinding() + self.parent2._getFullBinding() return self.top.returnResult(None) def _getAllVariables(self): '''Retrieve the list of all variables, to be returned''' if self.parent1 and self.parent2: return list2set(self.parent1._getAllVariables() + self.parent2._getAllVariables()) return list2set(self.top.bindings.keys()) def _orderedSelect(self, selection, orderedBy, orderDirection): ''' The variant of the selection (as below) that also includes the sorting. Because that is much less efficient, this is separated into a distinct method that is called only if necessary. It is called from the L{select<select>} method. \t\t Because order can be made on variables that are not part of the final selection, this method retrieves a I{full} binding from the result to be able to order it (whereas the core L{select<select>} method retrieves from the result the selected bindings only). The full binding is an array of (binding) dictionaries; the sorting sorts this array by comparing the bound variables in the respective dictionaries. Once this is done, the final selection is done. @param selection: Either a single query string, or an array or tuple thereof. @param orderBy: either a function or a list of strings (corresponding to variables in the query). If None, no sorting occurs on the results. If the parameter is a function, it must take two dictionary arguments (the binding dictionaries), return -1, 0, and 1, corresponding to smaller, equal, and greater, respectively. @param orderDirection: if not None, then an array of integers of the same length as orderBy, with values the constants ASC or DESC (defined in the module). If None, an ascending order is used. @return: selection results @rtype: list of tuples @raise SPARQLError: invalid sorting arguments ''' fullBinding = self._getFullBinding() fullBinding.sort(_sortBinding) retval = _processResults(selection, fullBinding) return retval def select(self, selection, distinct = True, limit = None, orderBy = None, orderAscend = None, offset = 0): """ Run a selection on the query. @param selection: Either a single query string, or an array or tuple thereof. @param distinct: if True, identical results are filtered out @type distinct: Boolean @param limit: if set to an integer value, the first 'limit' number of results are returned; all of them otherwise @type limit: non negative integer @param orderBy: either a function or a list of strings (corresponding to variables in the query). If None, no sorting occurs on the results. If the parameter is a function, it must take two dictionary arguments (the binding dictionaries), return -1, 0, and 1, corresponding to smaller, equal, and greater, respectively. @param orderAscend: if not None, then an array of booelans of the same length as orderBy, True for ascending and False \t\tfor descending. If None, an ascending order is used. @offset the starting point of return values in the array of results. Obviously, this parameter makes real sense if some sort of order is defined. @return: selection results @rtype: list of tuples @raise SPARQLError: invalid selection argument """ def _uniquefyList(lst): '''Return a copy of the list but possible duplicate elements are taken out. Used to post-process the outcome of the query @param lst: input list @return: result list ''' if len(lst) <= 1: return lst if orderBy != None: retval = [] for i in xrange(0, len(lst)): v = lst[i] skip = False for w in retval: if w == v: skip = True break continue len(lst) <= 1 if not skip: retval.append(v) continue return retval return list(sets.Set(lst)) selectionF = _variablesToArray(selection, 'selection') if type(offset) is not types.IntType or offset < 0: raise SPARQLError("'offset' argument is invalid") offset < 0 if limit != None: if type(limit) is not types.IntType or limit < 0: raise SPARQLError("'offset' argument is invalid") limit < 0 if orderBy != None: results = self._orderedSelect(selectionF, orderBy, orderAscend) elif self.parent1 != None and self.parent2 != None: results = self.parent1.select(selectionF) + self.parent2.select(selectionF) else: results = _processResults(selectionF, self.top.returnResult(selectionF)) if distinct: retval = _uniquefyList(results) else: retval = results if limit != None: return retval[offset:limit + offset] if offset > 0: return retval[offset:] return retval def construct(self, pattern = None): ''' Expand the subgraph based on the pattern or, if None, the internal bindings. In the former case the binding is used to instantiate the triplets in the patterns; in the latter, the original statements are used as patterns. The result is a separate triple store containing the subgraph. @param pattern: a L{GraphPattern<rdflib.sparql.graphPattern.GraphPattern>} instance or None @return: a new triple store @rtype: L{sparqlGraph<rdflib.sparql.sparqlGraph>} ''' if self.parent1 != None and self.parent2 != None: return self.parent1.construct(pattern) + self.parent2.construct(pattern) subgraph = SPARQLGraph() self.top.expandSubgraph(subgraph, pattern) return subgraph def ask(self): ''' Whether a specific pattern has a solution or not. @rtype: Boolean ''' return len(self.select('*')) != 0 def clusterForward(self, selection): ''' Forward clustering, using all the results of the query as seeds (when appropriate). It is based on the usage of the L{cluster forward<rdflib.sparql.sparqlGraph.clusterForward>} method for triple store. @param selection: a selection to define the seeds for clustering via the selection; the result of select used for the clustering seed @return: a new triple store @rtype: L{sparqlGraph<rdflib.sparql.sparqlGraph>} ''' if self.parent1 != None and self.parent2 != None: return self.parent1.clusterForward(selection) + self.parent2.clusterForward(selection) clusterF = SPARQLGraph() for r in reduce((lambda x, y: list(x) + list(y)), self.select(selection), ()): try: check_subject(r) self.triples.clusterForward(r, clusterF) continue continue continue return clusterF def clusterBackward(self, selection): ''' Backward clustering, using all the results of the query as seeds (when appropriate). It is based on the usage of the L{cluster backward<rdflib.sparql.sparqlGraph.clusterBackward>} method for triple store. @param selection: a selection to define the seeds for clustering via the selection; the result of select used for the clustering seed @return: a new triple store @rtype: L{sparqlGraph<rdflib.sparql.sparqlGraph>} ''' if self.parent1 != None and self.parent2 != None: return self.parent1.clusterBackward(selection) + self.parent2.clusterBackward(selection) clusterB = SPARQLGraph() for r in reduce((lambda x, y: list(x) + list(y)), self.select(selection), ()): self.triples.clusterBackward(r, clusterB) return clusterB def cluster(self, selection): ''' Cluster: a combination of L{Query.clusterBackward} and L{Query.clusterForward}. @param selection: a selection to define the seeds for clustering via the selection; the result of select used for the clustering seed ''' return self.clusterBackward(selection) + self.clusterForward(selection) def describe(self, selection, forward = True, backward = True): ''' The DESCRIBE Form in the SPARQL draft is still in state of flux, so this is just a temporary method, in fact. It may not correspond to what the final version of describe will be (if it stays in the draft at all, that is). At present, it is simply a wrapper around L{cluster}. @param selection: a selection to define the seeds for clustering via the selection; the result of select used for the clustering seed @param forward: cluster forward yes or no @type forward: Boolean @param backward: cluster backward yes or no @type backward: Boolean ''' if forward and backward: return self.cluster(selection) if forward: return self.clusterForward(selection) if backward: return self.clusterBackward(selection) return SPARQLGraph()